syntax = "proto3";

package tensorflow.profiler;

import "google/protobuf/any.proto";
import "tensorflow/core/profiler/protobuf/diagnostics.proto";

// Generic hardware bottleneck.
message BottleneckAnalysis {
  // Percentage of step time that is spent on input.
  double input_percent = 7;
  // Percentage of step time that is spent on output.
  double output_percent = 8;
  // Percentage of step time that is idle for non-I/O-related reason.
  double idle_percent = 9;
  // Percentage of step time that is spent on compute.
  double compute_percent = 10;
  // Indicates if input is a bottleneck. Possible values:  "host", "device",
  // "both", or "unknown"
  string input_classification = 1;
  // A human-readable description of the input bottleneck.
  string input_statement = 2;
  // Indicates if kernel launching is a bottleneck. Possible values: "no",
  // "moderate", "high".
  string kernel_launch_classification = 3;
  // A human-readable description of the kernel launching overhead.
  string kernel_launch_statement = 4;
  // Indicates if all other is a bottleneck. Possible values: "no", "moderate",
  // "high".
  string all_other_classification = 5;
  // A human-readable description of the all other overhead.
  string all_other_statement = 6;
  // Indicates if device collective communication is a bottleneck. Possible
  // values: "no", "moderate", "high".
  string device_collectives_classification = 11;
  // A human-readable description of the device collective communication
  // overhead.
  string device_collectives_statement = 12;
}

// Used for both step duration and Op duration.
message StepSummary {
  double average = 1;
  double standard_deviation = 2;
  double minimum = 3;
  double maximum = 4;
}

// Per-step details on generic hardware.
message PerGenericStepDetails {
  // The step number of a step.
  int32 step_number = 1;
  // The step name.
  string step_name = 14;
  // The step time (in ms).
  double step_time_ms = 2;
  // Breakdown of the step time in different event categories.
  // The unknown time (in ms).
  double unknown_time_ms = 3;
  // The time (in ms) in which the host is waiting for input data to be ready.
  double host_wait_input_ms = 11;
  // The time (in ms) in which the host is sending input data to the device.
  // Total input time = host_wait_input_ms + host_to_device_ms.
  double host_to_device_ms = 12;
  // The output time (in ms).
  double output_ms = 5;
  // The device-compute time (in ms).
  double device_compute_ms = 6;
  // The device-to-device communication time (in ms).
  double device_to_device_ms = 7;
  // The device time spent on collective communications (in ms).
  double device_collectives_ms = 13;
  // The host-compute time (in ms).
  double host_compute_ms = 8;
  // The host-prepare time (in ms).
  double host_prepare_ms = 9;
  // The time spent on compiling (in ms).
  double host_compile_ms = 10;
  reserved 4;
}

message InputTimeBreakdown {
  // Time spent on demanded file read in microseconds.
  double demanded_file_read_us = 1;
  // Time spent on advanced file read in microseconds.
  double advanced_file_read_us = 2;
  // Time spent on data preprocessing in microseconds.
  double preprocessing_us = 3;
  // The infeed enqueue time in microseconds.
  double enqueue_us = 4;
  // This entry is for the situtation where we can't further
  // break down the non-enqueue input time (because the input pipeline
  // is not instrumented).
  double unclassified_non_enqueue_us = 5;
}

message InputOpDetails {
  // The Op's name.
  string op_name = 1;
  // The number of occurrences.
  uint64 count = 2;
  // Time (accumulated over all occurrences) in milliseconds.
  double time_in_ms = 3;
  // Time (accumulated over all occurrences) in
  // percentage of the total input processing time.
  double time_in_percent = 4;
  // Self time (accumulated over all occurrences) in milliseconds.
  double self_time_in_ms = 5;
  // Self time (accumulated over all occurrences) in
  // percentage of the total input processing time.
  double self_time_in_percent = 6;
  // Possible categories: "Enqueue", "Advanced file read",
  // "Demanded file read", "Preprocessing", "Unknown".
  string category = 7;
}

message InputPipelineAnalysisRecommendation {
  // A list of detailed recommendations.
  repeated string details = 1;
  // An analysis of different types of bottlenecks. Can be unpacked into a
  // BottleneckAnalysis.
  google.protobuf.Any bottleneck_analysis = 2;
  // A suggested step to take next.
  string summary_next_step = 3;
}

message GenericStepTimeBreakdown {
  // Summary of all unknown time as a part of step in ms.
  StepSummary unknown_time_ms_summary = 1;
  // Summary of all host-wait-input time as a part of step in ms.
  StepSummary host_wait_input_ms_summary = 9;
  // Summary of all host-to-device time as a part of step in ms.
  StepSummary host_to_device_ms_summary = 10;
  // Summary of all input time as a part of step in ms.
  StepSummary input_ms_summary = 11;
  // Summary of all output time as a part of step in ms.
  StepSummary output_ms_summary = 3;
  // Summary of all device-compute time as a part of step in ms.
  StepSummary device_compute_ms_summary = 4;
  // Summary of all device-to-device time as a part of step in ms.
  StepSummary device_to_device_ms_summary = 5;
  // Summary of all device-collectives time as a part of step in ms.
  StepSummary device_collectives_ms_summary = 12;
  // Summary of all host-compute time as a part of step in ms.
  StepSummary host_compute_ms_summary = 6;
  // Summary of all host-prepare time as a part of step in ms.
  StepSummary host_prepare_ms_summary = 7;
  // Summary of all compilation time as a part of step in ms.
  StepSummary host_compile_ms_summary = 8;
  reserved 2;
}

message InputPipelineAnalysisResult {
  // tag : indicate the format of step_details and step_time_breakdown.
  // true if google confidential format, otherwise it is in the format of
  // PerGenericStepDetails and GenericStepTimeBreakdown respectively.
  bool tag = 16;
  // Hardware type.
  string hardware_type = 9;
  // Summary of all step duration across all cores.
  StepSummary step_time_summary = 2;
  // Summary of all input-related stall as percentage of step duration.
  StepSummary input_percent_summary = 3;
  // Percentage of step time that is waiting for input.
  double input_percent = 11;
  // Percentage of step time that is doing output.
  double output_percent = 13;
  // Percentage of step time that is idle for non-I/O-related reason.
  double idle_percent = 14;
  // Percentage of step time that is doing compute.
  double compute_percent = 15;
  // Details of each step. Can be unpacked into a PerGenericStepDetails.
  repeated google.protobuf.Any step_details = 4;
  // The breakdown of the input processing time.
  InputTimeBreakdown input_time_breakdown = 5;
  // Details of each input Op executed.
  repeated InputOpDetails input_op_details = 6;
  // Recommendation for next steps to users.
  InputPipelineAnalysisRecommendation recommendation = 7;
  // Breakdown of the step time. Can be unpacked into a
  // GenericStepTimeBreakdown.
  google.protobuf.Any step_time_breakdown = 8;
  // Error and warning messages for diagnosing profiling issues.
  Diagnostics diagnostics = 12;
  reserved 1, 10;
}
